# install.packages("devtools")
#
# devtools::install_github("KlausVigo/phangorn")
#
# devtools::install_github("benjjneb/dada2", ref="v1.16") # change the ref argument to get other versions
#
# devtools::install_github("tidyverse/tidyverse")
#
#
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("ShortRead")
#
# BiocManager::install("DECIPHER")
run_atropos(raw_files_path = "/Users/fconstan/Documents/GitHub/metabarcodingRpipeline/test-data/",
atropos = "/Users/fconstan/miniconda3/envs/qiime2-2020.6/bin/atropos",
PRIMER_F = "CCTACGGGNGGCWGCAG",
PRIMER_R = "GACTACHVGGGTATCTAATCC")
run_dada2_qplot(cut_dir = "dada2/00_atropos_primer_removed",
export = FALSE) -> qplot
##
## ##You are using DADA2 version 1.16.0
##
## ##You are using tidyverse version 1.3.0.9000
##
## ################################
##
##
## # output dir : dada2/01_dada2_quality_profiles/180914_M00842_0310_000000000-C3GBT
##
## # sample names list starts with :
##
## # output dir : dada2/01_dada2_quality_profiles/190719_M00842_0364_000000000-CKGHM
##
## # sample names list starts with :
qplot
## $fwd_plot
## $fwd_plot$`180914_M00842_0310_000000000-C3GBT`

##
## $fwd_plot$`190719_M00842_0364_000000000-CKGHM`

##
##
## $rev_plot
## $rev_plot$`180914_M00842_0310_000000000-C3GBT`

##
## $rev_plot$`190719_M00842_0364_000000000-CKGHM`

run_dada2_filter_denoise_merge_reads(cut_dir = "dada2/00_atropos_primer_removed",
trunclen = c(240,230),
minLen = 180,
maxee = c(2,3),
nbases = 10, # testing purpose only!!
minover = 15,
nthreads = 6,
remove_input_fastq = FALSE,
export = FALSE) -> filtered
##
## ##You are using DADA2 version 1.16.0
##
## ##You are using tidyverse version 1.3.0.9000
##
## ################################
##
##
## # output dir : dada2/02_dada2_filtered_denoised_merged/180914_M00842_0310_000000000-C3GBT
##
## # sample names list starts with :
##
## # filterAndTrim
##
## # Filtering and trimming done with the following parameters:
## # Forward pair: trimming at 240 nts and max expected error 2
## # Reverse pair: trimming at 230 nts and max expected error 3
##
##
## # Filtered fastq files were generated in : "dada2/02_dada2_filtered_denoised_merged/180914_M00842_0310_000000000-C3GBT"
##
## # derepFastq
##
## # Dereplication done
##
## # learnErrors
## 4960800 total bases in 20670 reads from 1 samples will be used for learning the error rates.
## 2807610 total bases in 12207 reads from 1 samples will be used for learning the error rates.
##
## # plotErrors
##
## # Errors learnt and ploted
##
## # dada
## Sample 1 - 14951 reads in 3705 unique sequences.
## Sample 2 - 12207 reads in 2320 unique sequences.
## Sample 3 - 20670 reads in 4481 unique sequences.
##
## selfConsist step 2Sample 1 - 14951 reads in 5111 unique sequences.
## Sample 2 - 12207 reads in 3888 unique sequences.
## Sample 3 - 20670 reads in 6216 unique sequences.
##
## selfConsist step 2
## # DADA2 algorithm performed
##
## # mergePairs with defined 15 nt overlap
## # Pairs were merged
## # Number of samples: 3
## # Number of detected variants (ASVs): 706# The variants (ASVs) have the following length distribution:
## # saving seqtab as dada2/02_dada2_filtered_denoised_merged/180914_M00842_0310_000000000-C3GBT/180914_M00842_0310_000000000-C3GBT_seqtab.rds
##
## # Plotting Sequences/ASV distribution to dada2/02_dada2_filtered_denoised_merged/180914_M00842_0310_000000000-C3GBT/seq_distrib_180914_M00842_0310_000000000-C3GBT.pdf
##
##
## # Generating summary
##
## # The distribution of merged kept is the following:
##
## # output dir : dada2/02_dada2_filtered_denoised_merged/190719_M00842_0364_000000000-CKGHM
##
## # sample names list starts with :
##
## # filterAndTrim
##
## # Filtering and trimming done with the following parameters:
## # Forward pair: trimming at 240 nts and max expected error 2
## # Reverse pair: trimming at 230 nts and max expected error 3
##
##
## # Filtered fastq files were generated in : "dada2/02_dada2_filtered_denoised_merged/190719_M00842_0364_000000000-CKGHM"
##
## # derepFastq
##
## # Dereplication done
##
## # learnErrors
## 6480 total bases in 27 reads from 1 samples will be used for learning the error rates.
## 1512940 total bases in 6578 reads from 1 samples will be used for learning the error rates.
##
## # plotErrors
##
## # Errors learnt and ploted
##
## # dada
## Sample 1 - 7172 reads in 2014 unique sequences.
## Sample 2 - 6578 reads in 1486 unique sequences.
## Sample 3 - 27 reads in 20 unique sequences.
##
## selfConsist step 2Sample 1 - 7172 reads in 1904 unique sequences.
## Sample 2 - 6578 reads in 1862 unique sequences.
## Sample 3 - 27 reads in 21 unique sequences.
##
## selfConsist step 2
## # DADA2 algorithm performed
##
## # mergePairs with defined 15 nt overlap
## # Pairs were merged
## # Number of samples: 3
## # Number of detected variants (ASVs): 271# The variants (ASVs) have the following length distribution:
## # saving seqtab as dada2/02_dada2_filtered_denoised_merged/190719_M00842_0364_000000000-CKGHM/190719_M00842_0364_000000000-CKGHM_seqtab.rds
##
## # Plotting Sequences/ASV distribution to dada2/02_dada2_filtered_denoised_merged/190719_M00842_0364_000000000-CKGHM/seq_distrib_190719_M00842_0364_000000000-CKGHM.pdf
##
##
## # Generating summary
##
## # The distribution of merged kept is the following:
filtered$track %>%
bind_rows() %>%
DT::datatable()
filtered$plot
## $`180914_M00842_0310_000000000-C3GBT`

##
## $`190719_M00842_0364_000000000-CKGHM`

filtered$out_fwd
## $`180914_M00842_0310_000000000-C3GBT`

##
## $`190719_M00842_0364_000000000-CKGHM`

filtered$out_rev
## $`180914_M00842_0310_000000000-C3GBT`

##
## $`190719_M00842_0364_000000000-CKGHM`

# filtered$seqtab
run_dada2_mergeRuns_removeBimeraDenovo(trim_length = c(300,450),
seqtab = filtered$seqtab,
track = filtered$track,
collapseNoMis = FALSE,
export = FALSE) -> merge
##
## ##You are using DADA2 version 1.16.0
##
## ##You are using tidyverse version 1.3.0.9000
##
## ################################
##
##
## # mergeSequenceTables done
##
## # removeBimeraDenovo start
##
## # removeBimeraDenovo done
## # 261 chimera were found and removed
## # These represent 28.65% of total ASVs and 13.45% of total reads
##
## # The variants (ASVs) have the following length distribution:
##
## # Reads shorter than 300bp and longer than 450bp were removed.
##
## # The variants (ASVs) after length filtering have the following length distribution:
##
## # A total of 99.78% reads were kept after length filtering.
##
##
## # A total of 98.46% ASVs were kept after length filtering.
##
##
## # The distribution of chimera reads kept is the following:
##
## # chimera removal step is done. You can go further into the analysis (taxonomy, phylogeny) or explore collapseNoMismatch IF you are dealing with multiple runs ... it might take very long
merge$track %>%
DT::datatable()
merge$plot

run_dada_DECIPHER_taxonomy(seqtab = merge$collapsed_seqtab,
threshold = 60, # used for DECIPHER and dada2 if outputBootstraps = FALSE
tryRC = FALSE,
merged_run_dir = "dada2/03_dada2_merged_runs_chimera_removed",
db = "~/db/DADA2/silva_nr_v138_train_set.fa.gz", # db = "~/db/DADA2/GTDB_r89-mod_June2019.RData"
db_species = "~/db/DADA2/silva_species_assignment_v138.fa.gz", # only for dada2 method #~/db/DADA2/silva_species_assignment_v138.fa.gz
export = FALSE
) -> tax
##
## ##You are using DADA2 version 1.16.0
##
## ##You are using tidyverse version 1.3.0.9000
##
##
## ##You are using DECIPHER version 2.16.1
##
## ################################
##
##
## # output dir : dada2/04_dada2_taxonomy
##
## # You have decided to use : dada method against : silva_nr_v138_train_set database
## Finished processing reference fasta.99 out of 320 were assigned to the species level.
## Of which 88 had genera consistent with the input table.
tax %>%
head()
## # A tibble: 6 x 21
## ASV_id ASV `R1F1-S66` `R1F2-S300` `R1F3-S90` `Y2A15-2M-06-S7…
## <chr> <chr> <int> <int> <int> <int>
## 1 asv1 TGGG… 2185 2213 54 0
## 2 asv2 TGGG… 10 42 21 2765
## 3 asv3 TGGG… 0 0 1253 0
## 4 asv4 TGGG… 728 734 0 0
## 5 asv5 TGGG… 0 0 172 0
## 6 asv6 TGGG… 0 0 1148 0
## # … with 15 more variables: `Y2A15-2M-12-S77` <int>, `Y3-R1F4-S136` <int>,
## # Kingdom <chr>, Phylum <chr>, Class <chr>, Order <chr>, Family <chr>,
## # Genus <chr>, Species <chr>, Kingdom_Boot <int>, Phylum_Boot <int>,
## # Class_Boot <int>, Order_Boot <int>, Family_Boot <int>, Genus_Boot <int>
run_merge_phyloseq(merged_table = tax,
metadata = "/Users/fconstan/Documents/GitHub/metabarcodingRpipeline/test-data/metadata.xlsx", #"none",
track = merge$track,
export = FALSE) -> ps
##
## ##You are using DADA2 version 1.16.0
##
## ##You are using tidyverse version 1.3.0.9000
##
##
## ##You are using phyloseq version 1.34.0
##
## ################################
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 320 taxa and 6 samples ]
## sample_data() Sample Data: [ 6 samples by 21 sample variables ]
## tax_table() Taxonomy Table: [ 320 taxa by 7 taxonomic ranks ]
## refseq() DNAStringSet: [ 320 reference sequences ]
add_phylogeny_to_phyloseq(ps,
export = "dada2/phyloseq_phylogeny") -> ps_phylo
## Determining distance matrix based on shared 8-mers:
## ================================================================================
##
## Time difference of 1.28 secs
##
## Clustering into groups by similarity:
## ================================================================================
##
## Time difference of 0.05 secs
##
## Aligning Sequences:
## ================================================================================
##
## Time difference of 1.95 secs
##
## Iteration 1 of 2:
##
## Determining distance matrix based on alignment:
## ================================================================================
##
## Time difference of 0.07 secs
##
## Reclustering into groups by similarity:
## ================================================================================
##
## Time difference of 0.05 secs
##
## Realigning Sequences:
## ================================================================================
##
## Time difference of 1.1 secs
##
## Iteration 2 of 2:
##
## Determining distance matrix based on alignment:
## ================================================================================
##
## Time difference of 0.08 secs
##
## Reclustering into groups by similarity:
## ================================================================================
##
## Time difference of 0.05 secs
##
## Realigning Sequences:
## ================================================================================
##
## Time difference of 0.24 secs
##
## Refining the alignment:
## ================================================================================
##
## Time difference of 0.04 secs
ps_phylo
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 320 taxa and 6 samples ]
## sample_data() Sample Data: [ 6 samples by 21 sample variables ]
## tax_table() Taxonomy Table: [ 320 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 320 tips and 319 internal nodes ]
## refseq() DNAStringSet: [ 320 reference sequences ]
ps_phylo %>%
phyloseq_DECIPHER_tax(physeq = .,
threshold = 60, # 60 (very high), 50 (high), PB = 10
db ="~/db/DADA2/SILVA_SSU_r132_March2018.RData", # db ="~/db/DADA2/GTDB_r95-mod_August2020.RData"
nthreads = 8,
return = TRUE) -> ps_phylo_GTDB_decipher
##
## # Running DECIPHER method against : SILVA_SSU_r132_March2018 database with threshold value = 60
## ================================================================================
##
## Time difference of 211.35 secs
ps_phylo_GTDB_decipher
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 320 taxa and 6 samples ]
## sample_data() Sample Data: [ 6 samples by 21 sample variables ]
## tax_table() Taxonomy Table: [ 320 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 320 tips and 319 internal nodes ]
## refseq() DNAStringSet: [ 320 reference sequences ]
#
ps_phylo_GTDB_decipher %>%
phloseq_export_otu_tax() %>%
select(-ASV_sequence) %>%
DT::datatable()
ps_phylo %>%
phyloseq_dada2_tax(physeq = .,
threshold = 60,
db ="~/db/DADA2/silva_nr99_v138_train_set.fa.gz",
db_species ="~/db/DADA2/silva_species_assignment_v138.fa.gz",
full_return = TRUE) -> ps_phylo_dada_silva_138
##
## # Running dada2 method against : silva_nr99_v138_train_set database with threshold value = 60
## Finished processing reference fasta.99 out of 320 were assigned to the species level.
## Of which 83 had genera consistent with the input table.
ps_phylo_dada_silva_138$full_table %>%
DT::datatable()
ps_phylo_dada_silva_138$physeq %>%
phloseq_export_otu_tax() %>%
select(-ASV_sequence) %>%
DT::datatable()
ps_phylo_dada_silva_138$physeq %>%
phloseq_export_otu_tax() %>%
select(-ASV_sequence) %>%
dplyr::select(-sample_names(ps_phylo_dada_silva_138$physeq)) %>%
left_join(
ps_phylo_GTDB_decipher %>%
phloseq_export_otu_tax(),
by = "ASV" ,
suffix = c("_d_s", "_D_g")) %>%
select(-contains(c("Kingdom", "Phylum", "Class", "length"))) %>%
# select(-ASV_sequence) %>%
DT::datatable()
ps_phylo %>%
phyloseq_DECIPHER_cluster_ASV(threshold = 97,
showPlot = TRUE) -> ps_phylo_clust97
## Determining distance matrix based on shared 8-mers:
## ================================================================================
##
## Time difference of 1.27 secs
##
## Clustering into groups by similarity:
## ================================================================================
##
## Time difference of 0.05 secs
##
## Aligning Sequences:
## ================================================================================
##
## Time difference of 1.15 secs
##
## Iteration 1 of 2:
##
## Determining distance matrix based on alignment:
## ================================================================================
##
## Time difference of 0.07 secs
##
## Reclustering into groups by similarity:
## ================================================================================
##
## Time difference of 0.05 secs
##
## Realigning Sequences:
## ================================================================================
##
## Time difference of 0.76 secs
##
## Iteration 2 of 2:
##
## Determining distance matrix based on alignment:
## ================================================================================
##
## Time difference of 0.07 secs
##
## Reclustering into groups by similarity:
## ================================================================================
##
## Time difference of 0.04 secs
##
## Realigning Sequences:
## ================================================================================
##
## Time difference of 0.23 secs
##
## Refining the alignment:
## ================================================================================
##
## Time difference of 0.04 secs
##
## ================================================================================
##
## Time difference of 0.07 secs
##
## ================================================================================

##
## Time difference of 0.24 secs
ps_phylo_clust97$physeq_clustered
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 234 taxa and 6 samples ]:
## sample_data() Sample Data: [ 6 samples by 21 sample variables ]:
## tax_table() Taxonomy Table: [ 234 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 234 tips and 233 internal nodes ]:
## refseq() DNAStringSet: [ 234 reference sequences ]
## taxa are rows
ps_phylo_clust97$physeq_clustered %>%
phloseq_export_otu_tax() %>%
DT::datatable()
ps_phylo %>%
phyloseq_DECIPHER_cluster_ASV(threshold = 100,
showPlot = TRUE) -> ps_phylo_clust100
## Determining distance matrix based on shared 8-mers:
## ================================================================================
##
## Time difference of 1.28 secs
##
## Clustering into groups by similarity:
## ================================================================================
##
## Time difference of 0.05 secs
##
## Aligning Sequences:
## ================================================================================
##
## Time difference of 1.14 secs
##
## Iteration 1 of 2:
##
## Determining distance matrix based on alignment:
## ================================================================================
##
## Time difference of 0.07 secs
##
## Reclustering into groups by similarity:
## ================================================================================
##
## Time difference of 0.05 secs
##
## Realigning Sequences:
## ================================================================================
##
## Time difference of 0.73 secs
##
## Iteration 2 of 2:
##
## Determining distance matrix based on alignment:
## ================================================================================
##
## Time difference of 0.07 secs
##
## Reclustering into groups by similarity:
## ================================================================================
##
## Time difference of 0.08 secs
##
## Realigning Sequences:
## ================================================================================
##
## Time difference of 0.23 secs
##
## Refining the alignment:
## ================================================================================
##
## Time difference of 0.04 secs
##
## ================================================================================
##
## Time difference of 0.07 secs
##
## ================================================================================

##
## Time difference of 0.24 secs
ps_phylo_clust100$physeq_clustered
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 319 taxa and 6 samples ]:
## sample_data() Sample Data: [ 6 samples by 21 sample variables ]:
## tax_table() Taxonomy Table: [ 319 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 319 tips and 318 internal nodes ]:
## refseq() DNAStringSet: [ 319 reference sequences ]
## taxa are rows
ps_phylo_clust100$physeq_clustered %>%
phloseq_export_otu_tax() %>%
DT::datatable()
ps_phylo %>%
phyloseq_vsearch_lulu_cluster_ASV(return = TRUE,
full_return = TRUE) -> lulu_out
## [1] "progress: 0%"
## [1] "progress: 1%"
## [1] "progress: 1%"
## [1] "progress: 1%"
## [1] "progress: 2%"
## [1] "progress: 2%"
## [1] "progress: 2%"
## [1] "progress: 2%"
## [1] "progress: 3%"
## [1] "progress: 3%"
## [1] "progress: 3%"
## [1] "progress: 4%"
## [1] "progress: 4%"
## [1] "progress: 4%"
## [1] "progress: 5%"
## [1] "progress: 5%"
## [1] "progress: 5%"
## [1] "progress: 6%"
## [1] "progress: 6%"
## [1] "progress: 6%"
## [1] "progress: 7%"
## [1] "progress: 7%"
## [1] "progress: 7%"
## [1] "progress: 8%"
## [1] "progress: 8%"
## [1] "progress: 8%"
## [1] "progress: 8%"
## [1] "progress: 9%"
## [1] "progress: 9%"
## [1] "progress: 9%"
## [1] "progress: 10%"
## [1] "progress: 10%"
## [1] "progress: 10%"
## [1] "progress: 11%"
## [1] "progress: 11%"
## [1] "progress: 11%"
## [1] "progress: 12%"
## [1] "progress: 12%"
## [1] "progress: 12%"
## [1] "progress: 12%"
## [1] "progress: 13%"
## [1] "progress: 13%"
## [1] "progress: 13%"
## [1] "progress: 14%"
## [1] "progress: 14%"
## [1] "progress: 14%"
## [1] "progress: 15%"
## [1] "progress: 15%"
## [1] "progress: 15%"
## [1] "progress: 16%"
## [1] "progress: 16%"
## [1] "progress: 16%"
## [1] "progress: 17%"
## [1] "progress: 17%"
## [1] "progress: 17%"
## [1] "progress: 18%"
## [1] "progress: 18%"
## [1] "progress: 18%"
## [1] "progress: 18%"
## [1] "progress: 19%"
## [1] "progress: 19%"
## [1] "progress: 19%"
## [1] "progress: 20%"
## [1] "progress: 20%"
## [1] "progress: 20%"
## [1] "progress: 21%"
## [1] "progress: 21%"
## [1] "progress: 21%"
## [1] "progress: 22%"
## [1] "progress: 22%"
## [1] "progress: 22%"
## [1] "progress: 22%"
## [1] "progress: 23%"
## [1] "progress: 23%"
## [1] "progress: 23%"
## [1] "progress: 24%"
## [1] "progress: 24%"
## [1] "progress: 24%"
## [1] "progress: 25%"
## [1] "progress: 25%"
## [1] "progress: 25%"
## [1] "progress: 26%"
## [1] "progress: 26%"
## [1] "progress: 26%"
## [1] "progress: 27%"
## [1] "progress: 27%"
## [1] "progress: 27%"
## [1] "progress: 28%"
## [1] "progress: 28%"
## [1] "progress: 28%"
## [1] "progress: 28%"
## [1] "progress: 29%"
## [1] "progress: 29%"
## [1] "progress: 29%"
## [1] "progress: 30%"
## [1] "progress: 30%"
## [1] "progress: 30%"
## [1] "progress: 31%"
## [1] "progress: 31%"
## [1] "progress: 31%"
## [1] "progress: 32%"
## [1] "progress: 32%"
## [1] "progress: 32%"
## [1] "progress: 32%"
## [1] "progress: 33%"
## [1] "progress: 33%"
## [1] "progress: 33%"
## [1] "progress: 34%"
## [1] "progress: 34%"
## [1] "progress: 34%"
## [1] "progress: 35%"
## [1] "progress: 35%"
## [1] "progress: 35%"
## [1] "progress: 36%"
## [1] "progress: 36%"
## [1] "progress: 36%"
## [1] "progress: 37%"
## [1] "progress: 37%"
## [1] "progress: 37%"
## [1] "progress: 38%"
## [1] "progress: 38%"
## [1] "progress: 38%"
## [1] "progress: 38%"
## [1] "progress: 39%"
## [1] "progress: 39%"
## [1] "progress: 39%"
## [1] "progress: 40%"
## [1] "progress: 40%"
## [1] "progress: 40%"
## [1] "progress: 41%"
## [1] "progress: 41%"
## [1] "progress: 41%"
## [1] "progress: 42%"
## [1] "progress: 42%"
## [1] "progress: 42%"
## [1] "progress: 42%"
## [1] "progress: 43%"
## [1] "progress: 43%"
## [1] "progress: 43%"
## [1] "progress: 44%"
## [1] "progress: 44%"
## [1] "progress: 44%"
## [1] "progress: 45%"
## [1] "progress: 45%"
## [1] "progress: 45%"
## [1] "progress: 46%"
## [1] "progress: 46%"
## [1] "progress: 46%"
## [1] "progress: 47%"
## [1] "progress: 47%"
## [1] "progress: 47%"
## [1] "progress: 48%"
## [1] "progress: 48%"
## [1] "progress: 48%"
## [1] "progress: 48%"
## [1] "progress: 49%"
## [1] "progress: 49%"
## [1] "progress: 49%"
## [1] "progress: 50%"
## [1] "progress: 50%"
## [1] "progress: 50%"
## [1] "progress: 51%"
## [1] "progress: 51%"
## [1] "progress: 51%"
## [1] "progress: 52%"
## [1] "progress: 52%"
## [1] "progress: 52%"
## [1] "progress: 52%"
## [1] "progress: 53%"
## [1] "progress: 53%"
## [1] "progress: 53%"
## [1] "progress: 54%"
## [1] "progress: 54%"
## [1] "progress: 54%"
## [1] "progress: 55%"
## [1] "progress: 55%"
## [1] "progress: 55%"
## [1] "progress: 56%"
## [1] "progress: 56%"
## [1] "progress: 56%"
## [1] "progress: 57%"
## [1] "progress: 57%"
## [1] "progress: 57%"
## [1] "progress: 57%"
## [1] "progress: 58%"
## [1] "progress: 58%"
## [1] "progress: 58%"
## [1] "progress: 59%"
## [1] "progress: 59%"
## [1] "progress: 59%"
## [1] "progress: 60%"
## [1] "progress: 60%"
## [1] "progress: 60%"
## [1] "progress: 61%"
## [1] "progress: 61%"
## [1] "progress: 61%"
## [1] "progress: 62%"
## [1] "progress: 62%"
## [1] "progress: 62%"
## [1] "progress: 62%"
## [1] "progress: 63%"
## [1] "progress: 63%"
## [1] "progress: 63%"
## [1] "progress: 64%"
## [1] "progress: 64%"
## [1] "progress: 64%"
## [1] "progress: 65%"
## [1] "progress: 65%"
## [1] "progress: 65%"
## [1] "progress: 66%"
## [1] "progress: 66%"
## [1] "progress: 66%"
## [1] "progress: 67%"
## [1] "progress: 67%"
## [1] "progress: 67%"
## [1] "progress: 68%"
## [1] "progress: 68%"
## [1] "progress: 68%"
## [1] "progress: 68%"
## [1] "progress: 69%"
## [1] "progress: 69%"
## [1] "progress: 69%"
## [1] "progress: 70%"
## [1] "progress: 70%"
## [1] "progress: 70%"
## [1] "progress: 71%"
## [1] "progress: 71%"
## [1] "progress: 71%"
## [1] "progress: 72%"
## [1] "progress: 72%"
## [1] "progress: 72%"
## [1] "progress: 72%"
## [1] "progress: 73%"
## [1] "progress: 73%"
## [1] "progress: 73%"
## [1] "progress: 74%"
## [1] "progress: 74%"
## [1] "progress: 74%"
## [1] "progress: 75%"
## [1] "progress: 75%"
## [1] "progress: 75%"
## [1] "progress: 76%"
## [1] "progress: 76%"
## [1] "progress: 76%"
## [1] "progress: 77%"
## [1] "progress: 77%"
## [1] "progress: 77%"
## [1] "progress: 78%"
## [1] "progress: 78%"
## [1] "progress: 78%"
## [1] "progress: 78%"
## [1] "progress: 79%"
## [1] "progress: 79%"
## [1] "progress: 79%"
## [1] "progress: 80%"
## [1] "progress: 80%"
## [1] "progress: 80%"
## [1] "progress: 81%"
## [1] "progress: 81%"
## [1] "progress: 81%"
## [1] "progress: 82%"
## [1] "progress: 82%"
## [1] "progress: 82%"
## [1] "progress: 82%"
## [1] "progress: 83%"
## [1] "progress: 83%"
## [1] "progress: 83%"
## [1] "progress: 84%"
## [1] "progress: 84%"
## [1] "progress: 84%"
## [1] "progress: 85%"
## [1] "progress: 85%"
## [1] "progress: 85%"
## [1] "progress: 86%"
## [1] "progress: 86%"
## [1] "progress: 86%"
## [1] "progress: 87%"
## [1] "progress: 87%"
## [1] "progress: 87%"
## [1] "progress: 88%"
## [1] "progress: 88%"
## [1] "progress: 88%"
## [1] "progress: 88%"
## [1] "progress: 89%"
## [1] "progress: 89%"
## [1] "progress: 89%"
## [1] "progress: 90%"
## [1] "progress: 90%"
## [1] "progress: 90%"
## [1] "progress: 91%"
## [1] "progress: 91%"
## [1] "progress: 91%"
## [1] "progress: 92%"
## [1] "progress: 92%"
## [1] "progress: 92%"
## [1] "progress: 92%"
## [1] "progress: 93%"
## [1] "progress: 93%"
## [1] "progress: 93%"
## [1] "progress: 94%"
## [1] "progress: 94%"
## [1] "progress: 94%"
## [1] "progress: 95%"
## [1] "progress: 95%"
## [1] "progress: 95%"
## [1] "progress: 96%"
## [1] "progress: 96%"
## [1] "progress: 96%"
## [1] "progress: 97%"
## [1] "progress: 97%"
## [1] "progress: 97%"
## [1] "progress: 98%"
## [1] "progress: 98%"
## [1] "progress: 98%"
## [1] "progress: 98%"
## [1] "progress: 99%"
## [1] "progress: 99%"
## [1] "progress: 99%"
## [1] "progress: 100%"
## [1] "progress: 100%"
## saved object and output to ~/
lulu_out$physeq_curated
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 319 taxa and 6 samples ]:
## sample_data() Sample Data: [ 6 samples by 21 sample variables ]:
## tax_table() Taxonomy Table: [ 319 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 319 tips and 318 internal nodes ]:
## refseq() DNAStringSet: [ 319 reference sequences ]
## taxa are rows
lulu_out$curated_result %>%
str()
## List of 10
## $ curated_table :'data.frame': 319 obs. of 6 variables:
## ..$ R1F1-S66 : int [1:319] 2185 10 0 728 0 0 0 0 0 0 ...
## ..$ R1F2-S300 : int [1:319] 2213 42 0 734 0 0 25 0 0 0 ...
## ..$ R1F3-S90 : int [1:319] 54 21 1253 0 172 1148 18 302 358 0 ...
## ..$ Y2A15-2M-06-S78: int [1:319] 0 2765 0 0 0 0 579 0 0 420 ...
## ..$ Y2A15-2M-12-S77: int [1:319] 0 65 213 0 1235 221 16 198 94 0 ...
## ..$ Y3-R1F4-S136 : int [1:319] 0 0 0 0 0 0 0 0 0 0 ...
## $ curated_count : int 319
## $ curated_otus : chr [1:319] "ASV001" "ASV002" "ASV003" "ASV004" ...
## $ discarded_count : int 1
## $ discarded_otus : chr "ASV191"
## $ runtime : 'difftime' num 0.104357004165649
## ..- attr(*, "units")= chr "secs"
## $ minimum_match : num 84
## $ minimum_relative_cooccurence: num 0.95
## $ otu_map :'data.frame': 320 obs. of 5 variables:
## ..$ total : num [1:320] 2903 334 638 210 62 ...
## ..$ spread : num [1:320] 5 5 4 4 4 4 3 3 3 3 ...
## ..$ parent_id: chr [1:320] "ASV002" "ASV012" "ASV007" "ASV020" ...
## ..$ curated : chr [1:320] "parent" "parent" "parent" "parent" ...
## ..$ rank : num [1:320] 2 12 7 20 42 64 1 13 14 17 ...
## $ original_table :'data.frame': 320 obs. of 6 variables:
## ..$ R1F1-S66 : int [1:320] 10 6 0 10 9 8 2185 0 0 19 ...
## ..$ R1F2-S300 : int [1:320] 42 5 25 0 7 4 2213 0 0 0 ...
## ..$ R1F3-S90 : int [1:320] 21 172 18 40 24 0 54 196 86 0 ...
## ..$ Y2A15-2M-06-S78: int [1:320] 2765 113 579 62 0 12 0 71 0 238 ...
## ..$ Y2A15-2M-12-S77: int [1:320] 65 38 16 98 22 12 0 49 207 9 ...
## ..$ Y3-R1F4-S136 : int [1:320] 0 0 0 0 0 0 0 0 7 0 ...
lulu_out$curated_result$otu_map %>%
DT::datatable()
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] lulu_0.1.0 speedyseq_0.5.3.9001
## [3] DECIPHER_2.16.1 ape_5.4-1
## [5] RSQLite_2.2.3 phyloseq_1.34.0
## [7] ShortRead_1.46.0 GenomicAlignments_1.24.0
## [9] SummarizedExperiment_1.20.0 Biobase_2.50.0
## [11] MatrixGenerics_1.2.0 matrixStats_0.58.0
## [13] Rsamtools_2.4.0 GenomicRanges_1.42.0
## [15] GenomeInfoDb_1.26.2 Biostrings_2.58.0
## [17] XVector_0.30.0 IRanges_2.24.1
## [19] S4Vectors_0.28.1 BiocParallel_1.24.1
## [21] BiocGenerics_0.36.0 dada2_1.16.0
## [23] Rcpp_1.0.6 forcats_0.5.1
## [25] stringr_1.4.0 dplyr_1.0.3
## [27] purrr_0.3.4 readr_1.4.0
## [29] tidyr_1.1.2 tibble_3.0.6
## [31] ggplot2_3.3.3 tidyverse_1.3.0.9000
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-0 hwriter_1.3.2 ellipsis_0.3.1
## [4] rprojroot_2.0.2 fs_1.5.0 rstudioapi_0.13
## [7] farver_2.0.3 bit64_4.0.5 DT_0.17
## [10] fansi_0.4.2 lubridate_1.7.9.2 xml2_1.3.2
## [13] splines_4.0.3 codetools_0.2-18 cachem_1.0.1
## [16] knitr_1.31 ade4_1.7-16 jsonlite_1.7.2
## [19] broom_0.7.4 cluster_2.1.0 dbplyr_2.0.0
## [22] png_0.1-7 compiler_4.0.3 httr_1.4.2
## [25] backports_1.2.1 fastmap_1.1.0 assertthat_0.2.1
## [28] Matrix_1.3-2 cli_2.3.0 htmltools_0.5.1.1
## [31] prettyunits_1.1.1 tools_4.0.3 igraph_1.2.6
## [34] gtable_0.3.0 glue_1.4.2 GenomeInfoDbData_1.2.4
## [37] reshape2_1.4.4 fastmatch_1.1-0 cellranger_1.1.0
## [40] vctrs_0.3.6 rhdf5filters_1.2.0 multtest_2.46.0
## [43] nlme_3.1-151 iterators_1.0.13 crosstalk_1.1.1
## [46] xfun_0.20 openxlsx_4.2.3 rvest_0.3.6
## [49] lifecycle_0.2.0 phangorn_2.5.5 zlibbioc_1.36.0
## [52] MASS_7.3-53 scales_1.1.1 hms_1.0.0
## [55] biomformat_1.18.0 rhdf5_2.34.0 RColorBrewer_1.1-2
## [58] yaml_2.2.1 memoise_2.0.0 latticeExtra_0.6-29
## [61] stringi_1.5.3 highr_0.8 foreach_1.5.1
## [64] permute_0.9-5 zip_2.1.1 rlang_0.4.10
## [67] pkgconfig_2.0.3 bitops_1.0-6 evaluate_0.14
## [70] lattice_0.20-41 Rhdf5lib_1.12.0 labeling_0.4.2
## [73] htmlwidgets_1.5.3 bit_4.0.4 tidyselect_1.1.0
## [76] here_1.0.1 plyr_1.8.6 magrittr_2.0.1
## [79] R6_2.5.0 generics_0.1.0 DelayedArray_0.16.1
## [82] DBI_1.1.1 mgcv_1.8-33 pillar_1.4.7
## [85] haven_2.3.1 withr_2.4.1 survival_3.2-7
## [88] RCurl_1.98-1.2 modelr_0.1.8 crayon_1.4.0
## [91] utf8_1.1.4 rmarkdown_2.6 jpeg_0.1-8.1
## [94] progress_1.2.2 grid_4.0.3 readxl_1.3.1
## [97] data.table_1.13.6 blob_1.2.1 vegan_2.5-7
## [100] reprex_1.0.0 digest_0.6.27 RcppParallel_5.0.2
## [103] munsell_0.5.0 quadprog_1.5-8